// Copyright 2018 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_BROWSER_CODE_CACHE_GENERATED_CODE_CACHE_H_
#define CONTENT_BROWSER_CODE_CACHE_GENERATED_CODE_CACHE_H_

#include <map>
#include <queue>

#include "base/containers/queue.h"
#include "base/files/file_path.h"
#include "base/memory/weak_ptr.h"
#include "base/timer/timer.h"
#include "content/browser/code_cache/simple_lru_cache.h"
#include "content/common/content_export.h"
#include "mojo/public/cpp/base/big_buffer.h"
#include "net/base/io_buffer.h"
#include "net/base/network_isolation_key.h"
#include "net/disk_cache/disk_cache.h"
#include "url/origin.h"

namespace content {

// Cache for storing generated code from the renderer on the disk. This cache
// uses |resource_url| + |origin_lock| as a key for storing the generated code.
// |resource_url| is the url corresponding to the requested resource.
// |origin_lock| is the origin that the renderer which requested this resource
// is locked to. This is used to enforce site isolation policy on cached code.
// For example, if SitePerProcess is enabled and http://script.com/script1.js is
// requested by http://example.com, then http://script.com/script.js is the
// resource_url and http://example.com is the origin_lock.
//
// The key is generated by concatenating the serialized url and origin lock
// with a separator in between. The separator is non-valid URL characters, to
// prevent any attacks by crafting the URLs. |origin_lock| could be empty when
// renderer is not locked to an origin (ex:SitePerProcess is disabled) and it
// is safe to use only |resource_url| as the key in such cases.
//
// This uses a simple disk_cache backend. It just stores one data stream and
// stores response_time + generated code as one data blob.
//
// There exists one cache per storage partition and is owned by the storage
// partition. This cache is created, accessed and destroyed on the I/O
// thread.
class CONTENT_EXPORT GeneratedCodeCache {
 public:
  using ReadDataCallback =
      base::OnceCallback<void(const base::Time&, mojo_base::BigBuffer data)>;
  using GetBackendCallback = base::OnceCallback<void(disk_cache::Backend*)>;

  // Cache type. Used for collecting statistics for JS and Wasm in separate
  // buckets.
  enum CodeCacheType {
    // JavaScript from http(s) pages.
    kJavaScript,

    // WebAssembly from http(s) pages. This cache allows more total size and
    // more size per item than the JavaScript cache, since some
    // WebAssembly programs are very large.
    kWebAssembly,

    // JavaScript from chrome and chrome-untrusted pages. The resource URLs are
    // limited to only those fetched via chrome and chrome-untrusted schemes.
    // The cache size is limited to disk_cache::kMaxWebUICodeCacheSize.
    // Deduplication of very large items is disabled in this cache.
    kWebUIJavaScript,
  };

  // Used for collecting statistics about cache behaviour.
  // Since it's uploaded to UMA, its values must never change.
  enum CacheEntryStatus : uint8_t {
    kHit,
    kMiss,
    kClear,
    kUpdate,
    kCreate,
    kError,
    kIncompleteEntry,
    kWriteFailed,
    kMaxValue = kWriteFailed
  };

  // Returns the resource URL from the key. The key has the format prefix +
  // resource URL + separator + requesting origin. This function extracts and
  // returns resource URL from the key, or the empty string if key is invalid.
  static std::string GetResourceURLFromKey(const std::string& key);

  // Creates a GeneratedCodeCache with the specified path and the maximum size.
  // If |max_size_bytes| is 0, then disk_cache picks a default size based on
  // some heuristics.
  GeneratedCodeCache(const base::FilePath& path,
                     int max_size_bytes,
                     CodeCacheType cache_type);

  GeneratedCodeCache(const GeneratedCodeCache&) = delete;
  GeneratedCodeCache& operator=(const GeneratedCodeCache&) = delete;

  ~GeneratedCodeCache();

  // Runs the callback with a raw pointer to the backend. If we could not create
  // the backend then it will return a null. This runs the callback
  // synchronously if the backend is already open or asynchronously on the
  // completion of a pending backend creation.
  void GetBackend(GetBackendCallback callback);

  // Writes data to the cache. If there is an entry corresponding to
  // <|resource_url|, |origin_lock|> this overwrites the existing data. If
  // there is no entry it creates a new one.
  void WriteEntry(const GURL& resource_url,
                  const GURL& origin_lock,
                  const net::NetworkIsolationKey& nik,
                  const base::Time& response_time,
                  mojo_base::BigBuffer data);

  // Fetch entry corresponding to <resource_url, origin_lock> from the cache
  // and return it using the ReadDataCallback.
  void FetchEntry(const GURL& resource_url,
                  const GURL& origin_lock,
                  const net::NetworkIsolationKey& nik,
                  ReadDataCallback);

  // Delete the entry corresponding to <resource_url, origin_lock>
  void DeleteEntry(const GURL& resource_url,
                   const GURL& origin_lock,
                   const net::NetworkIsolationKey& nik);

  // Should be only used for tests. Sets the last accessed timestamp of an
  // entry.
  void SetLastUsedTimeForTest(const GURL& resource_url,
                              const GURL& origin_lock,
                              const net::NetworkIsolationKey& nik,
                              base::Time time,
                              base::OnceClosure callback);

  // Clears the in-memory cache.
  void ClearInMemoryCache();

  const base::FilePath& path() const { return path_; }

 private:
  class PendingOperation;

  // State of the backend.
  enum BackendState { kInitializing, kInitialized, kFailed };

  // The operation requested.
  enum Operation {
    kFetch,
    kFetchWithSHAKey,
    kWrite,
    kWriteWithSHAKey,
    kDelete,
    kGetBackend
  };

  // Data streams corresponding to each entry.
  enum { kSmallDataStream = 0, kLargeDataStream = 1 };

  // Creates a simple_disk_cache backend.
  void CreateBackend();
  void DidCreateBackend(disk_cache::BackendResult result);

  // Adds operation to the appropriate queue.
  void EnqueueOperation(std::unique_ptr<PendingOperation> op);

  // Issues ops that were received while the backend was being initialized.
  void IssuePendingOperations();
  void IssueOperation(PendingOperation* op);

  // Writes entry to cache.
  void WriteEntryImpl(PendingOperation* op);
  void OpenCompleteForWrite(PendingOperation* op,
                            disk_cache::EntryResult result);
  void WriteSmallBufferComplete(PendingOperation* op, int rv);
  void WriteLargeBufferComplete(PendingOperation* op, int rv);
  void WriteComplete(PendingOperation* op);

  // Fetches entry from cache.
  void FetchEntryImpl(PendingOperation* op);
  void OpenCompleteForRead(PendingOperation* op,
                           disk_cache::EntryResult result);
  void ReadSmallBufferComplete(PendingOperation* op, int rv);
  void ReadLargeBufferComplete(PendingOperation* op, int rv);
  void ReadComplete(PendingOperation* op);

  // Deletes entry from cache.
  void DeleteEntryImpl(PendingOperation* op);

  void DoomEntry(PendingOperation* op);

  // Issues the next operation on the queue for |key|.
  void IssueNextOperation(const std::string& key);
  // Removes |op| and issues the next operation on its queue.
  void CloseOperationAndIssueNext(PendingOperation* op);

  // Enqueues the operation issues it if there are no pending operations for
  // its key.
  void EnqueueOperationAndIssueIfNext(std::unique_ptr<PendingOperation> op);
  // Dequeues the operation and transfers ownership to caller.
  std::unique_ptr<PendingOperation> DequeueOperation(PendingOperation* op);

  void DoPendingGetBackend(PendingOperation* op);

  void OpenCompleteForSetLastUsedForTest(base::Time time,
                                         base::OnceClosure callback,
                                         disk_cache::EntryResult result);

  void CollectStatistics(GeneratedCodeCache::CacheEntryStatus status);

  // Whether very large cache entries are deduplicated in this cache.
  // Deduplication is disabled in the WebUI code cache, as an additional defense
  // against privilege escalation in case there is a bug in the deduplication
  // logic.
  bool IsDeduplicationEnabled() const;

  bool ShouldDeduplicateEntry(uint32_t data_size) const;

  // Checks that the header data in the small buffer is valid. We may read cache
  // entries that were written by a previous version of Chrome which uses
  // obsolete formats. These reads should fail and be doomed as soon as
  // possible.
  bool IsValidHeader(scoped_refptr<net::IOBufferWithSize> small_buffer) const;

  void ReportPeriodicalHistograms();

  std::unique_ptr<disk_cache::Backend> backend_;
  BackendState backend_state_;

  // Queue for operations received while initializing the backend.
  using PendingOperationQueue = base::queue<std::unique_ptr<PendingOperation>>;
  PendingOperationQueue pending_ops_;

  // Map from key to queue of pending operations.
  std::map<std::string, PendingOperationQueue> active_entries_map_;

  base::FilePath path_;
  int max_size_bytes_;
  CodeCacheType cache_type_;

  // A hypothetical memory-backed code cache. Used to collect UMAs.
  SimpleLruCache lru_cache_;
  base::RepeatingTimer histograms_timer_;
  static constexpr int64_t kLruCacheCapacity = 50 * 1024 * 1024;

  base::WeakPtrFactory<GeneratedCodeCache> weak_ptr_factory_{this};
};

}  // namespace content

#endif  // CONTENT_BROWSER_CODE_CACHE_GENERATED_CODE_CACHE_H_
